/* Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#include "byteorder.h"

#include "wbyte.h"

int EvaluateOneWbyteLength( uint16_t charset , unsigned char *begin , unsigned char *end )
{
	unsigned char	*p = begin ;
	
	if( (*p) < 0x80 )
		return 1;
	
	if( charset == WBYTE_CHARSET_GB18030 )
	{
		unsigned char	c1 ;
		unsigned char	c2 ;
		uint32_t	u4b ;
		
		if( p+3 <= end )
		{
			u4b = HTOBE32( *((uint32_t*)p) ) ;
		}
		else
		{
			u4b = 0 ;
		}
		
		c1 = (p<=end) ? (unsigned char)*(p) : '\0' ;
		c2 = (p+1<=end) ? (unsigned char)*(p+1) : '\0' ;
		if( c2 == '\0' )
			return -1;
		
		if
		(
			( 0xA1<=c1 && c1<=0xA9 && 0xA1<=c2 && c2<=0xFE )
			||
			( 0xA8<=c1 && c1<=0xA9 && ( ( 0x40<=c2 && c2<=0x7E ) || ( 0x80<=c2 && c2<=0xA0 ) ) )
			||
			( 0xB0<=c1 && c1<=0xF7 && 0xA1<=c2 && c2<=0xFE )
			||
			( 0x81<=c1 && c1<=0xA0 && ( ( 0x40<=c2 && c2<=0x7E ) || ( 0x80<=c2 && c2<=0xFE ) ) )
			||
			( 0xAA<=c1 && c1<=0xFE && ( ( 0x40<=c2 && c2<=0x7E ) || ( 0x80<=c2 && c2<=0xA0 ) ) )
			||
			( 0xAA<=c1 && c1<=0xAF && 0xA1<=c2 && c2<=0xFE )
			||
			( 0xF8<=c1 && c1<=0xFE && 0xA1<=c2 && c2<=0xFE )
			||
			( 0xA1<=c1 && c1<=0xA7 && ( ( 0x40<=c2 && c2<=0x7E ) || ( 0x80<=c2 && c2<=0xA0 ) ) )
		)
		{
			return 2;
		}
		else if( u4b == 0 )
		{
			return -2;
		}
		else if
		(
			(
				( HTOBE32(0x81318132)<=u4b && u4b<=HTOBE32(0x81319934) )
				||
				( HTOBE32(0x8430BA32)<=u4b && u4b<=HTOBE32(0x8430FE35) )
				||
				( HTOBE32(0x84318730)<=u4b && u4b<=HTOBE32(0x84319530) )
				||
				( HTOBE32(0x8132E834)<=u4b && u4b<=HTOBE32(0x8132FD31) )
				||
				( HTOBE32(0x9034C538)<=u4b && u4b<=HTOBE32(0x9034C730) )
				||
				( HTOBE32(0x8134F434)<=u4b && u4b<=HTOBE32(0x8134F830) )
				||
				( HTOBE32(0x8134F932)<=u4b && u4b<=HTOBE32(0x81358437) )
				||
				( HTOBE32(0x81358B32)<=u4b && u4b<=HTOBE32(0x81359935) )
				||
				( HTOBE32(0x82359833)<=u4b && u4b<=HTOBE32(0x82369435) )
				||
				( HTOBE32(0x82369535)<=u4b && u4b<=HTOBE32(0x82369A32) )
				||
				( HTOBE32(0x81339D36)<=u4b && u4b<=HTOBE32(0x8133B635) )
				||
				( HTOBE32(0x8139A933)<=u4b && u4b<=HTOBE32(0x8139B734) )
				||
				( HTOBE32(0x8237CF35)<=u4b && u4b<=HTOBE32(0x8336BE36) )
				||
				( HTOBE32(0x9232C636)<=u4b && u4b<=HTOBE32(0x9232D635) )
				||
				( HTOBE32(0x81398B32)<=u4b && u4b<=HTOBE32(0x8139A135) )
				||
				( HTOBE32(0x8139EE39)<=u4b && u4b<=HTOBE32(0x82358738) )
				||
				( HTOBE32(0x82358F33)<=u4b && u4b<=HTOBE32(0x82359636) )
				||
				( HTOBE32(0x95328236)<=u4b && u4b<=HTOBE32(0x9835F336) )
				||
				( HTOBE32(0x9835F738)<=u4b && u4b<=HTOBE32(0x98399E36) )
				||
				( HTOBE32(0x98399F38)<=u4b && u4b<=HTOBE32(0x9839B539) )
				||
				( HTOBE32(0x9839B632)<=u4b && u4b<=HTOBE32(0x9933FE33) )
				||
				( HTOBE32(0x99348138)<=u4b && u4b<=HTOBE32(0x9939F730) )
			)
		)
		{
			return 4;
		}
		else
		{
			return 0;
		}
	}
	else if( charset == WBYTE_CHARSET_UTF8 )
	{
		unsigned char	c1 ;
		unsigned char	c2 ;
		unsigned char	c3 ;
		unsigned char	c4 ;
		
		c1 = (p<=end) ? (unsigned char)*(p) : '\0' ;
		c2 = (p+1<=end) ? (unsigned char)*(p+1) : '\0' ;
		c3 = (p+2<=end) ? (unsigned char)*(p+2) : '\0' ;
		c4 = (p+3<=end) ? (unsigned char)*(p+3) : '\0' ;
		
		if( (c1>>5) == 0x06 && (c2>>6) == 0x02 )
		{
			return 2;
		}
		else if( (c1>>4) == 0x0E && (c2>>6) == 0x02 && (c3>>6) == 0x02 )
		{
			return 3;
		}
		else if( (c1>>3) == 0x1E && (c2>>6) == 0x02 && (c3>>6) == 0x02 && (c4>>6) == 0x02 )
		{
			return 4;
		}
		else
		{
			return 0;
		}
	}
	else
	{
		return -4;
	}
}

